Scratch work

Code
# IMPORT RELEVANT LIBRARIES
import numpy as np
import pandas as pd
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
import plotly.offline as pyo
import numpy as np
import json
import requests

from pathlib import Path
Code
# import the csv
dc_health_df = pd.read_csv('./PLACES__Census_Tract_Data__GIS_Friendly_Format___2022_release (1).csv')

# filter for where StateAbbr = DC
dc_health_df = dc_health_df[dc_health_df['StateAbbr'] == 'DC']
dc_health_df.head()
StateAbbr StateDesc CountyName CountyFIPS TractFIPS TotalPopulation ACCESS2_CrudePrev ACCESS2_Crude95CI ARTHRITIS_CrudePrev ARTHRITIS_Crude95CI ... OBESITY_Crude95CI PHLTH_CrudePrev PHLTH_Crude95CI SLEEP_CrudePrev SLEEP_Crude95CI STROKE_CrudePrev STROKE_Crude95CI TEETHLOST_CrudePrev TEETHLOST_Crude95CI Geolocation
8871 DC District of Columbia District of Columbia 11001 11001007604 3644 9.0 ( 7.0, 11.3) 24.3 (22.9, 25.7) ... (30.2, 34.0) 10.4 ( 9.2, 11.7) 39.0 (37.0, 40.8) 4.8 ( 4.2, 5.5) 15.5 ( 9.8, 22.1) POINT (-76.96379997 38.86610065)
8884 DC District of Columbia District of Columbia 11001 11001005600 6756 5.5 ( 4.4, 7.0) 13.7 (13.0, 14.4) ... (15.6, 18.2) 5.9 ( 5.3, 6.7) 26.2 (24.3, 28.2) 2.0 ( 1.8, 2.2) 7.6 ( 5.1, 10.9) POINT (-77.05385751 38.90021244)
8919 DC District of Columbia District of Columbia 11001 11001008402 2149 5.2 ( 4.1, 6.6) 15.9 (15.2, 16.8) ... (19.8, 22.0) 5.7 ( 5.1, 6.4) 28.8 (27.5, 30.3) 2.4 ( 2.1, 2.8) 7.2 ( 4.1, 14.4) POINT (-76.9910702 38.89861357)
9058 DC District of Columbia District of Columbia 11001 11001001100 4779 3.3 ( 3.0, 3.8) 18.8 (18.1, 19.6) ... (16.0, 17.5) 5.2 ( 4.9, 5.7) 23.6 (22.8, 24.4) 2.0 ( 1.9, 2.2) 3.4 ( 2.6, 5.2) POINT (-77.07767045 38.95722636)
9075 DC District of Columbia District of Columbia 11001 11001008002 3031 4.7 ( 3.8, 6.1) 13.4 (12.7, 14.2) ... (19.4, 21.8) 4.9 ( 4.4, 5.6) 28.9 (27.6, 30.5) 1.8 ( 1.6, 2.0) 5.6 ( 3.3, 9.7) POINT (-76.98275018 38.89154628)

5 rows × 67 columns

Code
# Resetting defaults and import plotly libraries
import plotly.io as pio
pio.renderers.default = "browser"
import plotly.graph_objects as go
import plotly.express as px
import plotly.io as pio
pio.renderers.default = "plotly_mimetype+notebook_connected"

# isolate only columns with CrudePrev in the name
dc_health_df_prev = dc_health_df.filter(regex='CrudePrev')
df = dc_health_df_prev

# Rename columns
df = df.rename(columns={'ACCESS2_CrudePrev': '% of Adults without Health Insurance', 
                        'ARTHRITIS_CrudePrev': '% of Adults with Arthritis', 
                        'BINGE_CrudePrev': '% of Adults who Binge Drink',
                        'BPHIGH_CrudePrev': '% of Adults with High Blood Pressure',
                        'BPMED_CrudePrev': '% of Adults with High Blood Pressure who take Blood Pressure Medication',
                        'CANCER_CrudePrev': '% of Adults who were Diagnosed with Cancer',
                        'CASTHMA_CrudePrev': '% of Adults who were Diagnosed with Asthma',
                        'CERVICAL_CrudePrev': '% of Women who had a Pap Smear in the Past 3 Years',
                        'CHD_CrudePrev': '% of Adults who were Diagnosed with Coronary Heart Disease',
                        'CHECKUP_CrudePrev': '% of Adults who had a Routine Checkup in the Past Year',
                        'CHOLSCREEN_CrudePrev': '% of Adults who had Cholesterol Checked in the Past 5 Years',
                        'COLON_SCREEN_CrudePrev': '% of Adults who had a Colonoscopy or similar test in the Past 10 Years',
                        'COPD_CrudePrev': '% of Adults who were Diagnosed with COPD (Chronic Obstructive Pulmonary Disease)',
                        'COREM_CrudePrev': '% Prevalence of Older Adult Men aged >=65 years who are up to date on preventative health',
                        'COREW_CrudePrev': '% Prevalence of Older Adult Women aged >=65 years who are up to date on preventative health',
                        'CSMOKING_CrudePrev': '% of Adults who Currently Smoke',
                        'DENTAL_CrudePrev': '% of Adults who had a Dental Visit in the Past Year',
                        'DEPRESSION_CrudePrev': '% of Adults who were Diagnosed with Depression',
                        'DIABETES_CrudePrev': '% of Adults who were Diagnosed with Diabetes',
                        'GHLTH_CrudePrev': '% of Adults who reported their Health as not Good',
                        'HIGHCHOL_CrudePrev': '% of Adults who were Diagnosed with High Cholesterol',
                        'KIDNEY_CrudePrev': '% of Adults who were Diagnosed with Kidney Disease',
                        'LPA_CrudePrev': '% of Adults who are Physically Inactive', 
                        'MAMMOUSE_CrudePrev': '% Women aged 50-74 years who had a Mammogram in the Past 2 Years',
                        'MHLTH_CrudePrev': '% of Adults who reported their Mental Health as not Good',
                        'OBESITY_CrudePrev': '% of Adults who were Obese',
                        'PHLTH_CrudePrev': '% of Adults who reported their Physical Health as not Good',
                        'SLEEP_CrudePrev': '% of Adults who reported their Sleep as not Good',
                        'STROKE_CrudePrev': '% of Adults who were Diagnosed with Stroke',
                        'TEETHLOST_CrudePrev': '% of Adults who have lost all of their Natural Teeth'})

# list of health metrics for drop down menu
column_names = df.columns

# Creating the initial scatter plot
fig = go.Figure(go.Scatter(x=df[column_names[0]], y=df[column_names[1]], mode='markers'))

# Label axes
fig.update_xaxes(title_text='X Axis')
fig.update_yaxes(title_text='Y Axis')

# Setting the range for x and y axes
fig.update_xaxes(range=[0, 100])
fig.update_yaxes(range=[0, 100])

for col in column_names:
    for col2 in column_names:
        fig.add_trace(go.Scatter(x=df[col], y=df[col2], mode='markers', name=col + ' vs ' + col2, visible=False))


# Create a function to update the visibility of the traces based on selected columns
def update_visibility(selected_col, selected_col2):
    for i, trace in enumerate(fig.data):
        trace.visible = (trace.name == selected_col + ' vs ' + selected_col2)

# Create the drop-down menus for x (col) and y (col2) axes of the scatter plot
col_dropdown = [{'label': col, 'value': col} for col in column_names]
col2_dropdown = [{'label': col2, 'value': col2} for col2 in column_names]

# Define the dropdown menu for x-axis
button_layer_1_height = 1.08
x_axis_dropdown = go.layout.Updatemenu(
    buttons=list([dict(args=[{'x': [df[col]]}, update_visibility(col, col2)], label=col, method='update') for col in column_names]),
    direction="down",
    pad={"r": 10, "t": 10},
    showactive=True,
    x=0.06,
    xanchor="left",
    y=button_layer_1_height + 0.05,
    yanchor="top"
)

# Define the dropdown menu for y-axis
y_axis_dropdown = go.layout.Updatemenu(
    buttons=list([dict(args=[{'y': [df[col2]]}, update_visibility(col, col2)], label=col2, method='update') for col2 in column_names]),
    direction="down",
    pad={"r": 10, "t": 10},
    showactive=True,
    x=0.06,
    xanchor="left",
    y=button_layer_1_height,
    yanchor="top"
)

# Update the layout to include the dropdown menus
fig.update_layout(
    updatemenus=[x_axis_dropdown, y_axis_dropdown]
)

# Label axes
fig.update_xaxes(title_text='X Axis')
fig.update_yaxes(title_text='Y Axis')

# Setting the range for x and y axes
fig.update_xaxes(range=[0, 100])
fig.update_yaxes(range=[0, 100])

# Update plot sizing
fig.update_layout(
    width=900,
    height=900,
    autosize=False,
    #margin=dict(t=100, b=0, l=0, r=0),
)

# add annotations
fig.update_layout(
    annotations=[
        dict(
            text="X Axis:",
            x=0,
            xref="paper",
            y=button_layer_1_height + 0.025,
            yref="paper",
            align="left",
            showarrow=False
        ),
        dict(
            text="Y Axis:",
            x=0,
            xref="paper",
            y=button_layer_1_height - 0.025,
            yref="paper",
            align="left",
            showarrow=False
        )
    ]
)

# Create a function to update the visibility of the traces based on selected columns
def update_visibility(selected_col, selected_col2):
    for i, trace in enumerate(fig.data):
        trace.visible = (trace.name == selected_col + ' vs ' + selected_col2)

# Display the scatter plot with dropdown menus
fig.show()

Code
# Import walkability data

df_walk = pd.read_csv('./joined_depression_cre_walkability.csv')
df_walk.head()
geoid_tract_20 census_tract_name depressed_perc black_non_hisp_perc white_non_hisp_perc hispanic_latino_perc 3_plus_cre_risk_factors_perc below_poverty_level_perc no_health_insurance_perc male_perc female_perc income_inequality_gini_index hs_grad_perc households_no_vehicle_perc households_w_internet_perc walkability_score GEOID neighborhood_name
0 11001009907 Census Tract 99.07, District of Columbia, Dist... 21.1 97.5 0.4 2.0 28.85 19.9 7.6 40.0 60.0 0.4280 88.6 54.3 64.4 12.250000 1.100101e+10 MARSHALL HEIGHTS
1 11001008802 Census Tract 88.02, District of Columbia, Dist... 18.3 68.7 25.6 2.7 35.38 19.7 4.3 51.1 48.9 0.4983 86.0 34.2 72.8 13.500000 1.100101e+10 TRINIDAD
2 11001005900 Census Tract 59, District of Columbia, Distric... 22.1 22.4 50.8 8.6 17.12 27.7 4.6 52.6 47.4 0.5047 90.7 61.1 82.6 16.500000 1.100101e+10 CHINATOWN
3 11001002900 Census Tract 29, District of Columbia, Distric... 19.3 19.6 47.1 25.0 13.98 8.2 7.8 50.1 49.9 0.4526 85.6 38.2 88.4 16.166667 1.100100e+10 COLUMBIA HEIGHTS
4 11001003302 Census Tract 33.02, District of Columbia, Dist... 18.5 37.6 48.7 5.2 9.09 9.3 1.4 46.3 53.7 0.3496 97.3 12.4 89.9 15.750000 1.100100e+10 BLOOMINGDALE
Code
print(dc_health_df.shape)
print(df_walk.shape)
(178, 67)
(396, 18)
Code
dc_health_df.rename(columns={'TractFIPS': 'census_tract'}, inplace=True)
df_walk.rename(columns={'geoid_tract_20': 'census_tract'}, inplace=True)
Code
# Merge the two dataframes
df_merged = pd.merge(dc_health_df, df_walk, on='census_tract', how='left')

print(df_merged.columns)
Index(['StateAbbr', 'StateDesc', 'CountyName', 'CountyFIPS', 'census_tract',
       'TotalPopulation', 'ACCESS2_CrudePrev', 'ACCESS2_Crude95CI',
       'ARTHRITIS_CrudePrev', 'ARTHRITIS_Crude95CI', 'BINGE_CrudePrev',
       'BINGE_Crude95CI', 'BPHIGH_CrudePrev', 'BPHIGH_Crude95CI',
       'BPMED_CrudePrev', 'BPMED_Crude95CI', 'CANCER_CrudePrev',
       'CANCER_Crude95CI', 'CASTHMA_CrudePrev', 'CASTHMA_Crude95CI',
       'CERVICAL_CrudePrev', 'CERVICAL_Crude95CI', 'CHD_CrudePrev',
       'CHD_Crude95CI', 'CHECKUP_CrudePrev', 'CHECKUP_Crude95CI',
       'CHOLSCREEN_CrudePrev', 'CHOLSCREEN_Crude95CI',
       'COLON_SCREEN_CrudePrev', 'COLON_SCREEN_Crude95CI', 'COPD_CrudePrev',
       'COPD_Crude95CI', 'COREM_CrudePrev', 'COREM_Crude95CI',
       'COREW_CrudePrev', 'COREW_Crude95CI', 'CSMOKING_CrudePrev',
       'CSMOKING_Crude95CI', 'DENTAL_CrudePrev', 'DENTAL_Crude95CI',
       'DEPRESSION_CrudePrev', 'DEPRESSION_Crude95CI', 'DIABETES_CrudePrev',
       'DIABETES_Crude95CI', 'GHLTH_CrudePrev', 'GHLTH_Crude95CI',
       'HIGHCHOL_CrudePrev', 'HIGHCHOL_Crude95CI', 'KIDNEY_CrudePrev',
       'KIDNEY_Crude95CI', 'LPA_CrudePrev', 'LPA_Crude95CI',
       'MAMMOUSE_CrudePrev', 'MAMMOUSE_Crude95CI', 'MHLTH_CrudePrev',
       'MHLTH_Crude95CI', 'OBESITY_CrudePrev', 'OBESITY_Crude95CI',
       'PHLTH_CrudePrev', 'PHLTH_Crude95CI', 'SLEEP_CrudePrev',
       'SLEEP_Crude95CI', 'STROKE_CrudePrev', 'STROKE_Crude95CI',
       'TEETHLOST_CrudePrev', 'TEETHLOST_Crude95CI', 'Geolocation',
       'census_tract_name', 'depressed_perc', 'black_non_hisp_perc',
       'white_non_hisp_perc', 'hispanic_latino_perc',
       '3_plus_cre_risk_factors_perc', 'below_poverty_level_perc',
       'no_health_insurance_perc', 'male_perc', 'female_perc',
       'income_inequality_gini_index', 'hs_grad_perc',
       'households_no_vehicle_perc', 'households_w_internet_perc',
       'walkability_score', 'GEOID', 'neighborhood_name'],
      dtype='object')
Code
# Resetting defaults and import plotly libraries
import plotly.io as pio
pio.renderers.default = "browser"
import plotly.graph_objects as go
import plotly.express as px
import plotly.io as pio
pio.renderers.default = "plotly_mimetype+notebook_connected"

# isolate only columns with CrudePrev in the name
dc_health_df_prev = dc_health_df.filter(regex='CrudePrev')
df = dc_health_df_prev



# Rename columns
df = df.rename(columns={'ACCESS2_CrudePrev': '% of Adults without Health Insurance', 
                        'ARTHRITIS_CrudePrev': '% of Adults with Arthritis', 
                        'BINGE_CrudePrev': '% of Adults who Binge Drink',
                        'BPHIGH_CrudePrev': '% of Adults with High Blood Pressure',
                        'BPMED_CrudePrev': '% of Adults with High Blood Pressure who take Blood Pressure Medication',
                        'CANCER_CrudePrev': '% of Adults who were Diagnosed with Cancer',
                        'CASTHMA_CrudePrev': '% of Adults who were Diagnosed with Asthma',
                        'CERVICAL_CrudePrev': '% of Women who had a Pap Smear in the Past 3 Years',
                        'CHD_CrudePrev': '% of Adults who were Diagnosed with Coronary Heart Disease',
                        'CHECKUP_CrudePrev': '% of Adults who had a Routine Checkup in the Past Year',
                        'CHOLSCREEN_CrudePrev': '% of Adults who had Cholesterol Checked in the Past 5 Years',
                        'COLON_SCREEN_CrudePrev': '% of Adults who had a Colonoscopy or similar test in the Past 10 Years',
                        'COPD_CrudePrev': '% of Adults who were Diagnosed with COPD (Chronic Obstructive Pulmonary Disease)',
                        'COREM_CrudePrev': '% Prevalence of Older Adult Men aged >=65 years who are up to date on preventative health',
                        'COREW_CrudePrev': '% Prevalence of Older Adult Women aged >=65 years who are up to date on preventative health',
                        'CSMOKING_CrudePrev': '% of Adults who Currently Smoke',
                        'DENTAL_CrudePrev': '% of Adults who had a Dental Visit in the Past Year',
                        'DEPRESSION_CrudePrev': '% of Adults who were Diagnosed with Depression',
                        'DIABETES_CrudePrev': '% of Adults who were Diagnosed with Diabetes',
                        'GHLTH_CrudePrev': '% of Adults who reported their Health as not Good',
                        'HIGHCHOL_CrudePrev': '% of Adults who were Diagnosed with High Cholesterol',
                        'KIDNEY_CrudePrev': '% of Adults who were Diagnosed with Kidney Disease',
                        'LPA_CrudePrev': '% of Adults who are Physically Inactive', 
                        'MAMMOUSE_CrudePrev': '% Women aged 50-74 years who had a Mammogram in the Past 2 Years',
                        'MHLTH_CrudePrev': '% of Adults who reported their Mental Health as not Good',
                        'OBESITY_CrudePrev': '% of Adults who were Obese',
                        'PHLTH_CrudePrev': '% of Adults who reported their Physical Health as not Good',
                        'SLEEP_CrudePrev': '% of Adults who reported their Sleep as not Good',
                        'STROKE_CrudePrev': '% of Adults who were Diagnosed with Stroke',
                        'TEETHLOST_CrudePrev': '% of Adults who have lost all of their Natural Teeth'})

# list of health metrics for drop down menu
column_names = df.columns

# Creating the initial scatter plot
fig = go.Figure(go.Scatter(x=df[column_names[0]], y=df[column_names[1]], mode='markers'))

# Label axes
fig.update_xaxes(title_text='X Axis')
fig.update_yaxes(title_text='Y Axis')

# Setting the range for x and y axes
fig.update_xaxes(range=[0, 100])
fig.update_yaxes(range=[0, 100])

for col in column_names:
    for col2 in column_names:
        fig.add_trace(go.Scatter(x=df[col], y=df[col2], mode='markers', name=col + ' vs ' + col2, visible=False))


# Create a function to update the visibility of the traces based on selected columns
def update_visibility(selected_col, selected_col2):
    for i, trace in enumerate(fig.data):
        trace.visible = (trace.name == selected_col + ' vs ' + selected_col2)

# Create the drop-down menus for x (col) and y (col2) axes of the scatter plot
col_dropdown = [{'label': col, 'value': col} for col in column_names]
col2_dropdown = [{'label': col2, 'value': col2} for col2 in column_names]

# Define the dropdown menu for x-axis
button_layer_1_height = 1.08

# Define the dropdown menu for y-axis
y_axis_dropdown = go.layout.Updatemenu(
    buttons=list([dict(args=[{'y': [df[col2]]}, update_visibility(col, col2)], label=col2, method='update') for col2 in column_names]),
    direction="down",
    pad={"r": 10, "t": 10},
    showactive=True,
    x=0.06,
    xanchor="left",
    y=button_layer_1_height,
    yanchor="top"
)

# Update the layout to include the dropdown menus
fig.update_layout(
    updatemenus=[y_axis_dropdown]
)

# Label axes
fig.update_xaxes(title_text='X Axis')
fig.update_yaxes(title_text='Y Axis')

# Setting the range for x and y axes
fig.update_xaxes(range=[0, 100])
fig.update_yaxes(range=[0, 100])

# Update plot sizing
fig.update_layout(
    width=900,
    height=900,
    autosize=False,
    #margin=dict(t=100, b=0, l=0, r=0),
)

# add annotations
fig.update_layout(
    annotations=[
        dict(
            text="X Axis: Walkability",
            x=0,
            xref="paper",
            y=button_layer_1_height + 0.025,
            yref="paper",
            align="left",
            showarrow=False
        ),
        dict(
            text="Y Axis:",
            x=0,
            xref="paper",
            y=button_layer_1_height - 0.025,
            yref="paper",
            align="left",
            showarrow=False
        )
    ]
)

# Create a function to update the visibility of the traces based on selected columns
def update_visibility(selected_col, selected_col2):
    for i, trace in enumerate(fig.data):
        trace.visible = (trace.name == selected_col + ' vs ' + selected_col2)

# Display the scatter plot with dropdown menus
fig.show()